{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "209da03b",
   "metadata": {},
   "source": [
    "# Yolov8 format\n",
    "Yolov8 wants following format: train, val and test folder. For each have a images folder and a labels folder. For each image in the images folder there is a .txt file with the same name and the corresponding labels. To get labels for segmentation look at JSON2YOLO github <https://github.com/ultralytics/JSON2YOLO>\n",
    "\n",
    "Change paths accordingly for individual use"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81e532c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get all file names\n",
    "\n",
    "import os\n",
    "image_directory = 'data/images'\n",
    " \n",
    "# get all image file names\n",
    "filenames = []\n",
    "for filename in os.listdir(image_directory):\n",
    "    filenames.append(filename[:-4]) # last 4 characters are '.jpg'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8b44590",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Originally data of gauge needle and gauge face seperated. With this we merge the labels.\n",
    "# 0 corresponds to gauge face, 1 to gauge needle\n",
    "\n",
    "def merge_labels(gauge_face_path, gauge_needle_path, dst_path):\n",
    "    for filename in filenames:\n",
    "        with open(dst_path + filename+'.txt', 'w') as outfile:\n",
    "            with open(gauge_face_path+filename+'.txt') as infile:\n",
    "                outfile.write(infile.read())\n",
    "            with open(gauge_needle_path+filename+'.txt') as infile:\n",
    "                for line in infile:\n",
    "                    line = '1' + line[1:]\n",
    "                    outfile.write(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "83c9cb0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "gauge_face_path = 'data/gauge_face/annotations/bbox_labels_yolo/'\n",
    "gauge_needle_path = 'data/gauge_needle/annotations/bbox_labels_yolo/'\n",
    "path_bbox_labels = 'data/bbox_labels/'\n",
    "merge_labels(gauge_face_path, gauge_needle_path, path_bbox_labels)\n",
    "gauge_face_path = 'data/gauge_face/annotations/segmentation_labels_yolo/'\n",
    "gauge_needle_path = 'data/gauge_needle/annotations/segmentation_labels_yolo/'\n",
    "path_segmentation_labels = 'data/segmentation_labels/'\n",
    "merge_labels(gauge_face_path, gauge_needle_path, path_segmentation_labels)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1b11c50b",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# split images into train, val and test set\n",
    "\n",
    "import random\n",
    "\n",
    "def split_dataset(filenames):\n",
    "    # Assuming you have a list of filenames called \"all_filenames\"\n",
    "    random.shuffle(filenames)\n",
    "\n",
    "    # Calculate the size of each set\n",
    "    num_files = len(filenames)\n",
    "    num_train = int(0.8 * num_files)  # 80% for training\n",
    "    num_val = int(0.1 * num_files)   # 10% for validation\n",
    "    num_test = num_files - num_train - num_val  # remaining 10% for test\n",
    "\n",
    "    # Split the list into three sets\n",
    "    train_filenames = filenames[:num_train]\n",
    "    val_filenames = filenames[num_train:num_train+num_val]\n",
    "    test_filenames = filenames[num_train+num_val:]\n",
    "\n",
    "    # Print the sizes of each set\n",
    "    print(f\"Number of files in train set: {len(train_filenames)}\")\n",
    "    print(f\"Number of files in validation set: {len(val_filenames)}\")\n",
    "    print(f\"Number of files in test set: {len(test_filenames)}\")\n",
    "    \n",
    "    return train_filenames, val_filenames, test_filenames\n",
    "\n",
    "\n",
    "train_filenames, val_filenames, test_filenames = split_dataset(filenames)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "040cfa28",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create folder structure\n",
    "import os\n",
    "dir_base = ['segmentation','detection']\n",
    "modes = ['train', 'val', 'test']\n",
    "for base in dir_base:\n",
    "    for mode in modes:\n",
    "        path = 'data/' + base + '/' + mode +'/images'\n",
    "        os.makedirs(path, exist_ok=True)\n",
    "        path = 'data/' + base + '/' + mode +'/labels'\n",
    "        os.makedirs(path, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "edb46dcc",
   "metadata": {},
   "outputs": [],
   "source": [
    "import shutil\n",
    "\n",
    "#copy image and label file of given file name to their corresponding folders in new folderstructure\n",
    "def copy_pair(src_dir, target_dir, file_name, mode):\n",
    "    src = image_directory + '/' + file_name + \".jpg\"\n",
    "    dst = target_dir + '/' + mode +'/images/' + file_name + \".jpg\"\n",
    "    shutil.copy2(src, dst)\n",
    "    src = src_dir + '/' + file_name + \".txt\"\n",
    "    dst = target_dir + '/' + mode +'/labels/' + file_name + \".txt\"\n",
    "    shutil.copy2(src, dst)\n",
    "\n",
    "#for each set copy all labels and images of this set to corresponding \n",
    "def copy_split(src_dir, target_dir):\n",
    "    for name in train_filenames:\n",
    "        copy_pair(src_dir, target_dir, name, 'train')\n",
    "    for name in val_filenames:\n",
    "        copy_pair(src_dir, target_dir, name, 'val')\n",
    "    for name in test_filenames:\n",
    "        copy_pair(src_dir, target_dir, name, 'test')\n",
    "\n",
    "copy_split(path_segmentation_labels, 'data/segmentation')\n",
    "copy_split(path_bbox_labels, 'data/detection')\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
